# -*- coding: utf-8 -*-
import logging
import random
import threading
import time
import requests
from scrapy.utils.project import get_project_settings

from zc_core.util.http_util import match_proxy

logger = logging.getLogger(__name__)


class MoguProxyPool(object):
    """爬虫本地代理池"""
    _instance_lock = threading.Lock()
    _biz_inited = False

    def __init__(self, settings=None):
        if not self._biz_inited:
            self._biz_inited = True
            if not settings:
                settings = get_project_settings()
            # 本地缓存池
            self.pool = list()
            # 两次请求最小间隔(秒)
            self.min_reload_period = settings.get('MIN_RELOAD_PERIOD', 5)
            # 池中最少代理数量
            self.min_pool_size = settings.get('MIN_PROXY_POOL_SIZE', 3)
            # 每次请求加载代理数量
            self.proxy_amount_pre_load = settings.get('PROXY_AMOUNT_PRE_LOAD', 1)
            # 提取链接
            self.pool_url = 'http://piping.mogumiao.com/proxy/api/get_ip_al?appKey=07e9aeb9a2c342aa8907eba481a0e200&count={}&expiryDate=0&format=2&newLine=2'.format(self.proxy_amount_pre_load)
            # 最后更新时间(秒)
            self._last_load_time = 0

    def __new__(cls, *args, **kwargs):
        if not hasattr(MoguProxyPool, "_instance"):
            with MoguProxyPool._instance_lock:
                if not hasattr(MoguProxyPool, "_instance"):
                    MoguProxyPool._instance = object.__new__(cls)
        return MoguProxyPool._instance

    def get_proxy(self):
        """获取代理"""
        if not self.pool or len(self.pool) < self.min_pool_size:
            # 初始化/补充
            self.reload()
        return random.choice(self.pool)

    def remove_proxy(self, proxy):
        if self.pool and proxy in self.pool:
            # 移除代理
            self.pool.remove(proxy)
            logger.info("移除代理: %s", proxy)
        # 代理数量不足，触发加载
        if not self.pool or len(self.pool) < self.min_pool_size:
            self.reload()

    def reload(self):
        """提取代理"""
        self._instance_lock.acquire(timeout=30)
        # 代理数量充足，无需加载
        if self.pool and len(self.pool) >= self.min_pool_size:
            self._instance_lock.release()
            logger.info("无需加载代理: %s", len(self.pool))
            return
        # 防止提取代理频率过高
        if (int(time.time()) - self._last_load_time) <= self.min_reload_period:
            time.sleep(10)
        # 提取代理
        response = requests.get(self.pool_url, timeout=15)
        if response.status_code == 200 and response.text:
            if 'code' not in response.text and 'msg' not in response.text:
                rows = response.text.strip().split('\r\n')
                for row in rows:
                    ip_port = match_proxy(row.strip())
                    if ip_port:
                        self.pool.append(ip_port)
                    else:
                        logger.error("代理数据异常: %s", row)
                self._last_load_time = int(time.time())
                logger.info("加载代理: current=%s, total=%s" % (rows, len(self.pool)))
            else:
                # 异常情况
                time.sleep(10)
                logger.error("代理提取异常: %s", response.text)
        self._instance_lock.release()

        return len(self.pool) > 0
